# -*- coding:utf8 -*-
# !/usr/bin/env python

"""
#全国企业信用信息公示系统（香港）
"""

# import requesocks as requests
# import ghost
import requests
from bs4 import BeautifulSoup
import re
import sys
import json
import copy
import traceback
import Queue
import threading
import time

from scpy.logger import get_logger

import xg_table
import xg_template_dict

reload(sys)
sys.setdefaultencoding("utf-8")

logger = get_logger(__file__)

post_url = url = 'http://120.55.112.88:7374/web'
WORKER_NUM = 20
TOTAL_NUM = 2348962

ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36"
headers = {
    "User-Agent": ua,
}


def get_com_info(companyName):
    req = requests.session()
    req.headers = headers

    index_url_1 = "https://www.mobile-cr.gov.hk/mob/mobile.jsp"
    index_url_2 = "https://www.mobile-cr.gov.hk/mob/cps_search.jsp"

    com_url = "https://www.mobile-cr.gov.hk/mob/cps_criteria.do"

    index_rs_1 = req.get(index_url_1, verify=False)
    index_rs_2 = req.get(index_url_2, verify=False)

    req_data = {
        'nextAction': 'cps_criteria',
        'searchMode': 'BYCRNO',
        'cnsFlag': 'cps_criteria',
        'mode': 'BYCRNO',
        'query': companyName,
        'queryCRNO': companyName,
        'queryNAME': '',
        'language': '',
        'page': '1',
    }

    com_res = req.post(com_url, data=req_data, verify=False).content

    # print com_res

    # 判断是否存在
    # TODO

    raw_dict = {
        "province": "xg",
        "type": "1",
        "html": {"base": com_res},
        "yearList": [],
        "keyword": companyName,
        "companyName": companyName,
        "json": "",
    }

    return raw_dict


def parse_base(raw_dict):
    raw_base = raw_dict.get("html", {}).get("base", "")
    print BeautifulSoup(raw_base, "html5lib")

    base_table_s = BeautifulSoup(raw_base, "html5lib").find_all("table", attrs={"class": "info"})
    base_list = xg_table.basic(str(base_table_s[0]))
    if base_list:
        # 香港没有币种，默认 港币
        base_list[0]["regCapcur"] = "港币"

        print json.dumps(base_list, indent=4, ensure_ascii=False)
        return base_list
    else:
        raise Exception('base info error')


def parse(raw_dict):
    res_base_dict = copy.deepcopy(xg_template_dict.void_base_dict)

    base_list = parse_base(raw_dict)
    res_base_dict["basicList"] = base_list
    res_base_dict["province"] = "xg"

    return res_base_dict


def search_2(companyName):
    logger.info(companyName)
    try:
        raw_dict = get_com_info(companyName)

        res_dict = parse(raw_dict)
        base_list = res_dict.get("basicList", [])
        if raw_dict and base_list and base_list[0]:
            company_name = base_list[0].get("enterpriseName", "")
            raw_dict["companyName"] = company_name

        return raw_dict, res_dict

    except Exception, e:
        time.sleep(20)
        raise e


def sent_data_to_server(companyName):
    try:
        result = search_2(companyName)
    except Exception, e:
        logger.info(e)
        traceback.print_exc(e)
        data = {'companyName': companyName, 'exception': 1}
        # return data
        for i in range(30):
            try:
                requests.post(post_url, data=json.dumps(data))
                return
            except Exception, e:
                print e
                continue
        return

    companyName = result[0]['companyName']
    data = {'data': result, 'companyName': companyName}
    # return data
    for i in range(30):
        try:
            requests.post(post_url, data=json.dumps(data))
            return
        except Exception, e:
            print e
            continue
    return


def run_task(companyName):
    sent_data_to_server(companyName)
    return


def task(start_num=1, total_com=TOTAL_NUM, worker_num=WORKER_NUM):
    task_queen = Queue.Queue()
    map(lambda item: task_queen.put(item),
        map(lambda x: "0" * (7 - len(str(x))) + str(x), xrange(start_num, total_com)))
    # time.sleep(2)
    while True:
        try:
            size = task_queen.qsize()
            print size
            size = worker_num if size >= worker_num else size
            task_list = map(lambda x: task_queen.get(), range(0, size))

            threading_list = map(lambda x: threading.Thread(target=run_task, args=(x,)), task_list)

            map(lambda x: x.start(), threading_list)
            map(lambda x: x.join(), threading_list)

            if not task_queen.qsize():
                return True

        except Queue.Empty:
            return True


def test(com_no):
    # com_no = "2348962"
    # com_no = "0000316"
    saic_res_dict = search_2(com_no)
    print json.dumps(saic_res_dict, indent=4, ensure_ascii=False)


if __name__ == "__main__":
    task(start_num=311108, total_com=1 * 5 * 10 ** 5, worker_num=1)
    # com_no = "0000316"
    # test(com_no)
